In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from sklearn import datasets, linear_model
from scipy.stats import linregress
from numpy.linalg import inv
import statsmodels.discrete.discrete_model as sm
from numpy.linalg import eigvals
import numpy.linalg as LA

%matplotlib inline

def set_data(p, x):
    temp = x.flatten()
    n = len(temp[p:])
    x_T = temp[p:].reshape((n, 1))
    X_p = np.ones((n, p + 1))
    for i in range(1, p + 1):
        X_p[:, i] = temp[i - 1: i - 1 + n]
    return X_p, x_T

Task 1


In [2]:
data = sio.loadmat('Tut3_file1.mat')
DLPFC = data['DLPFC']
x = DLPFC[0, :]
plt.plot(DLPFC.T)


Out[2]:
[<matplotlib.lines.Line2D at 0x7fd4a7c73780>,
 <matplotlib.lines.Line2D at 0x7fd4a7c73940>]

In [43]:
x = DLPFC[0]
X_p, x_T = set_data(3, x)

In [44]:
res = LA.lstsq(X_p, x_T)

In [69]:
eps = x_T - np.dot(X_p, res[0])
XpXp = np.diag(LA.inv(np.dot(X_p.T, X_p)))
sdeps = np.std(eps, axis=0, ddof=0)
res[0].flatten() / (sdeps * np.sqrt(XpXp))
err = (sdeps * np.sqrt(XpXp))


Out[69]:
array([ 0.0363071 ,  0.05266018,  0.08164755,  0.05291614])

The t-statistic computes how distant our parameters are from the zero hypothesis in terms of standard errors. So for $a_0$ and $a_1$ we cannot refute the zero hypothesis, but in case of $a_2$ and $a_3$ it is highly unlikely that our that comes from random fluctuations.


In [53]:
import statsmodels.api as sm
ols = sm.OLS(x_T, X_p).fit()
ols.summary()


/home/lorenzo/anaconda3/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
Out[53]:
OLS Regression Results
Dep. Variable: y R-squared: 0.832
Model: OLS Adj. R-squared: 0.831
Method: Least Squares F-statistic: 584.5
Date: Fri, 24 Nov 2017 Prob (F-statistic): 1.63e-136
Time: 14:49:17 Log-Likelihood: -371.95
No. Observations: 357 AIC: 751.9
Df Residuals: 353 BIC: 767.4
Df Model: 3
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -0.0029 0.037 -0.078 0.938 -0.075 0.069
x1 0.0263 0.053 0.497 0.619 -0.078 0.130
x2 -0.4109 0.082 -5.005 0.000 -0.572 -0.249
x3 1.2459 0.053 23.412 0.000 1.141 1.351
Omnibus: 5.333 Durbin-Watson: 1.991
Prob(Omnibus): 0.070 Jarque-Bera (JB): 5.224
Skew: 0.236 Prob(JB): 0.0734
Kurtosis: 3.359 Cond. No. 7.69

The likelihood function is: $ f (Y) = (\frac{1}{\sqrt{2 \pi} \sigma} \exp{- \frac{1}{2 \sigma^2}\sum^n_{i=1}(Y_i - a_0 X_{i1} - ... - a_4)}) $, so the log likelihood will be: $-n\ln(\sqrt{2 \pi} \sigma) - \frac{1}{2 \sigma^2}\sum^n_{i=1}(Y_i - a_0 X_{i1} - ... - a_4)$. The result in our case can be seen in the summary of the previous cell.


In [70]:
ols.params, res[0].flatten()
ols.HC1_se,


Out[70]:
array([ 0.03660203,  0.05112009,  0.0826645 ,  0.05439778])

Task 2

We do the VAR(1) regression similar to the previous case using all the time series.


In [18]:
X = np.ones((5, len(x)))
X[1:3, :] = DLPFC
X[3:, :] = data['Parietal']
X_T = X[1:, 1:].T.reshape((359, 4))
A = np.zeros((4, 5))
tv = np.zeros((4, 5))
for i in range(1, 5):

    X_T = X[i, 1:].T.reshape((359, 1))
    X_p = X[:, :-1].T
    ols = sm.OLS(X_T, X_p).fit()
    A[i-1, :] = ols.params
    tv[i-1, :] = ols.pvalues

We can see on the off diagonal terms are much smaller than the terms on diagonal. That shows that there is not as much influence between features as on the feature itself.


In [20]:
A[:, :]


Out[20]:
array([[ 0.00249032,  0.90668588, -0.02035133,  0.01568924, -0.0337574 ],
       [-0.00689996,  0.00975185,  0.89853244, -0.00518405, -0.00724408],
       [ 0.00286988, -0.01223052,  0.04526162,  0.87266512,  0.00158487],
       [ 0.03277565,  0.15398158, -0.09797983,  0.01444267,  0.93634098]])

In [29]:
X_T = X[1:, 1:].T.reshape((359, 4))

res = LA.lstsq(X_p, X_T)

For checking if it si astationaty process we will see if the eignvalues are in the unitcircle. They are.


In [33]:
res[0].shape


Out[33]:
(5, 4)

The loglikelihood of this model will be the


In [36]:
eps = X_T - np.dot(X_p, res[0])
sdeps = np.std(eps, axis=0)

In [39]:



Out[39]:
array([ 0.73474227,  0.66397843,  1.03441867,  1.15650573])

In [41]:
XpXp = LA.inv(np.dot(X_p.T, X_p))

In [ ]: